# the code to generate the event study plots 

# Load necessary libraries
library(dplyr)
library(fixest)
library(ggplot2)
library(stringr)
library(broom)  # for tidying model output

#CHOOSE sample size
sample_choice <- "10_percent_sample"

#CHOOSE balance requirement of the panel ("choice can be: unbalanced", "pre_post_1_t" or "pre_post_3_t")
suffix_mild_balanced = "pre_post_3_t"

#-----------------------------------------------------------#
#Load dataset
df <- readRDS(paste0("data/derived/mild_balanced_", suffix_mild_balanced,"_stacked_nonpolicy_firm_payroll_provider_dataset_", sample_choice, ".rds"))

df <- df %>% 
  mutate(super_gap_std = as.vector(scale(super_gap)),
         T_std = as.vector(scale(T)))

#Define set of outcome variables of interest
outcome_var_vector = c("ln_avg_wage_exact",
                       "share_wage_ltmw",
                       "share_wage_gtmw"
)

#Define set of treatment variables of interest
treat_var_vector = c("T_std",
                     "super_gap_std"
)

#Define set with all the event "identifiers": we'll also run the event-study separately for each of the events
events_ids = c(unique(df$trt_exp))


#Loop over different outcome and treatment variables
for(y in outcome_var_vector){
  for(t in treat_var_vector){
    
    #----------------------#  
    #### Stacked events ####
    #----------------------#
    #Define formula for regression (this is necessary to loop over different dependent variables)
    formula = as.formula(paste(y, "~ i(etime,", t, ", ref = -1) | 
                         clt_client^factor(trt_exp) + czone^trt_exp + etime^factor(trt_exp)"))
    
    #Run regression
    fit = fixest::feols(formula, cluster = ~czone, data = df)
    
    #Extract coefficients and SEs
    coefs = coef(fit)
    ses = se(fit)
    
    pre_treatment_coefs = coefs[grep("etime::[-]", names(coefs))]
    avg_pre_treatment_coefs_t_y <- sum(pre_treatment_coefs) / 12
    # 
    post_treatment_coefs = coefs[grep("etime::[^-]", names(coefs))]
    avg_post_treatment_coefs_t_y <- sum(post_treatment_coefs) / 11
    # 
    
    pre_treatment_se <- ses[grepl("etime::[-]", names(ses)) ]
    avg_se_pre_treatment_t_y <- sum(pre_treatment_se) / sqrt(12)
    
    post_treatment_se <- ses[grepl("etime::[^-]", names(ses))]
    avg_se_post_treatment_t_y <- sum(post_treatment_se) / sqrt(11)
    
    #Put results into a dataframe
    results = data.frame(
      interaction = names(coefs),
      estimate = coefs,
      se = ses,
      row.names = NULL) %>%
      #Bind with omitted interaction variable: event time -1
      rbind(data.frame(interaction = "etime::-1:T",
                       estimate = 0,
                       se = 0)) %>%
      mutate(etime = as.numeric(str_extract(interaction, "-?\\d+")), #extract event time numbers
             ci_upper = estimate + 1.96*se,
             ci_lower = estimate - 1.96*se) %>%
      arrange(etime)
    
    #Plot results
    plot = ggplot(results,
                  aes(x = etime, y = estimate)) +
      geom_errorbar(aes(ymin = ci_lower,
                        ymax = ci_upper),
                    color = "blue") +
      geom_point(color = "red",
                 size = 2) +
      geom_hline(yintercept = 0, 
                 linetype = "dashed") +
      geom_vline(xintercept = -.5,
                 linetype = "dashed",
                 color = "red") +
      scale_x_continuous(limits = c(-6, 5),
                         breaks = seq(-6, 5, 1)) + 
      scale_y_continuous(limits = c(-0.05, 0.05),
                         breaks = seq(-.05, .05, .025)) +
      theme_minimal() +
      theme(#panel.grid.major = element_blank(),
        panel.background = element_blank(),
        text = element_text(size = 13),
        axis.text = element_text(colour = "black", size = 13)
      ) +
      labs(x = "Months relative to event",
           y = "Estimated coefficients",
           #title = paste0("Outcome variable: ", y, ". Treatment variable: ", t),
           #subtitle = paste("Stacked events. Balance requirement:", suffix_mild_balanced),
           #caption = paste("Sample size:", sample_choice)
      )
    
    print(plot)
    
    #Save output
    ggsave(plot,
           device="pdf",
           width=12,
           height=7,
           filename = paste0("/figures_tables/event_study_12month", suffix_mild_balanced, "_stacked_", sample_choice,"_outcome_", y, "_treat_var_", t, ".pdf"))
    
    #Plot results
    plot = ggplot(results,
                  aes(x = etime, y = estimate)) +
      geom_errorbar(aes(ymin = ci_lower,
                        ymax = ci_upper),
                    color = "blue") +
      geom_point(color = "red",
                 size = 2) +
      geom_hline(yintercept = 0, 
                 linetype = "dashed") +
      geom_vline(xintercept = -.5,
                 linetype = "dashed",
                 color = "red") +
      scale_x_continuous(limits = c(-12, 11),
                         breaks = seq(-12, 11, 1)) + 
      scale_y_continuous(limits = c(-0.05, 0.05),
                         breaks = seq(-.05, .05, .025)) +
      theme_minimal() +
      theme(#panel.grid.major = element_blank(),
        panel.background = element_blank(),
        text = element_text(size = 13),
        axis.text = element_text(colour = "black", size = 13)
      ) +
      labs(x = "Months relative to event",
           y = "Estimated coefficients",
           #title = paste0("Outcome variable: ", y, ". Treatment variable: ", t),
           #subtitle = paste("Stacked events. Balance requirement:", suffix_mild_balanced),
           #caption = paste("Sample size:", sample_choice)
      )
    
    print(plot)
    
    #Save output
    ggsave(plot,
           device="pdf",
           width=12,
           height=7,
           filename = paste0("output/figures_tables/event_study_24month", suffix_mild_balanced, "_stacked_", sample_choice,"_outcome_", y, "_treat_var_", t, ".pdf"))
    
  }
}
